{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "79cb0a94-3cf4-42e1-b370-4f4e1679bc4b",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70b65fe2-bf20-43e4-87d9-3a1de33b02c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "import time\n",
    "import csv\n",
    "from pathlib import Path\n",
    "from datetime import datetime\n",
    "from dataclasses import dataclass\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05fabaea-39c1-4458-9293-187322e626ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "home = Path('...ADD_PATH...')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9452edf7-a2f0-4b24-8a82-9743ff36f83f",
   "metadata": {},
   "source": [
    "## Load harmful prompts (and suffle)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d1b4051-153e-4086-bcf1-7d1de283d1f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "harmful_behaviour_data = home/'data/advbench_subset.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e4f7ee8f-7a62-4cf9-b9cd-cdd1f22c2466",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_behaviour = pd.read_csv(harmful_behaviour_data)\n",
    "df_behaviour['attack_success_CRDHPW23'] = 0\n",
    "df_behaviour['attack_success_this_work_tot'] = 0\n",
    "df_behaviour = df_behaviour.reset_index().rename(columns={'index': 'behaviour_id'})\n",
    "df_behaviour = df_behaviour.set_index(['behaviour_id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2b3d5be-a1d4-46b4-8f7e-3ba362d14b28",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(1234)\n",
    "df_behaviour = df_behaviour.sample(frac=1).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c77bff3b-593a-4254-960a-9867496829f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_behaviour.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "43c1c539-ff59-487d-9446-686b40df5604",
   "metadata": {},
   "source": [
    "# Create Results Folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ded3c881-c4d6-4601-b465-823c50d15a6c",
   "metadata": {},
   "outputs": [],
   "source": [
    "results_folder = f'simulation_results/simulation_results_{datetime.today().strftime(\"%Y_%m_%d\")}/'\n",
    "!mkdir -p \"$results_folder\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7824a69a-6558-4eea-a1d1-59722e4170a1",
   "metadata": {},
   "source": [
    "## Implementation of Tree of Attacks with Pruning (TAP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d69206a0-ae16-422c-bae4-69f965572d81",
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass\n",
    "class TAP:\n",
    "    branch_factor: int = 4\n",
    "    depth: int = 10\n",
    "    width: int = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "221dd35b-6b66-4541-824c-c7606f406910",
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_simulation(target='llama-2',\\\n",
    "                    evaluator='no-evaluator',\n",
    "                    attacker=\"vicuna-api-model\",\n",
    "                    attack=None,\\\n",
    "                    prompt_ids=range(10)):\n",
    "    \n",
    "    currentTime = datetime.now().strftime(\"%Y_%m_%d_at_%H_%M\")\n",
    "    \n",
    "    folder = f'target_{target}/evaluator_{evaluator}/'\\\n",
    "             + f'params_b{attack.branch_factor}_'\\\n",
    "             + f't{attack.depth}_'\\\n",
    "             + f'w{attack.width}' \n",
    "    \n",
    "    !mkdir -p \"$results_folder\"/\"$folder\"\n",
    "    folder = str(home/results_folder/folder) \n",
    "    \n",
    "    for ijk, i in tqdm(enumerate(prompt_ids)): \n",
    "        goal = df_behaviour.iloc[i]['goal']\n",
    "        target_str = df_behaviour.iloc[i]['target']\n",
    "        \n",
    "        !python3.10 main_TAP.py --attack-model \"$attacker\" --target-model \"$target\" --evaluator-model \"$evaluator\" \\\n",
    "                 --goal \"\\\"$goal\\\"\" --target-str \"\\\"$target_str\\\"\" \\\n",
    "                 --branching-factor \"$attack.branch_factor\" --width \"$attack.width\" --depth \"$attack.depth\" \\\n",
    "                 --store-folder \"$folder\" --iter-index \"$i\" \\\n",
    "                 >> \"$folder\"/iter_\"$i\"_datetime_\"$currentTime\" 2>&1 "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5f501d76-9e4f-4a43-9c34-6a5e4dd505bf",
   "metadata": {},
   "source": [
    "## Run Simulations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "750c664c-4f6c-4271-8cd6-65215a43ed9c",
   "metadata": {},
   "outputs": [],
   "source": [
    "attack = TAP(branch_factor=4, depth=10, width=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71b7ed7e-90f3-4088-b026-a5ff93d92c19",
   "metadata": {},
   "source": [
    "### Evaluator: GPT-4, Attacker: Vicuna-13B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25578b47-4221-44e6-bd79-c0ebdeac8bbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "evaluator = 'gpt-4'\n",
    "attacker = \"vicuna-api-model\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9bc19b5f-580c-475d-b016-1c1f7df59b5c",
   "metadata": {},
   "source": [
    "#### Target: Vicuna-13B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d493c01-4497-4b4e-89f3-695ce2a15a58",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_simulation(target='vicuna-api-model', \n",
    "               evaluator=evaluator,\n",
    "               attacker=attacker, \n",
    "               attack=attack, \n",
    "               prompt_ids=range(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c4d4670-13c0-474f-86ce-24702584f5c3",
   "metadata": {},
   "source": [
    "#### Target: Llama-2-Chat-7B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd6238c0-9bd3-4b58-ad19-30bae8b69e81",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_simulation(target='llama-2-api-model', \n",
    "               evaluator=evaluator,\n",
    "               attacker=attacker, \n",
    "               attack=attack, \n",
    "               prompt_ids=range(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9cfccf3f-3147-4fe1-9555-5f3533755cd4",
   "metadata": {},
   "source": [
    "#### Target: GPT-3.5-Turbo "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3c91d88-5939-4047-8690-a94957ba94e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_simulation(target='gpt-3.5-turbo',\n",
    "               evaluator=evaluator,\n",
    "               attacker=attacker, \n",
    "               attack=attack, \n",
    "               prompt_ids=range(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afa2f888-9ccb-4221-a5ba-268d2bc548c7",
   "metadata": {},
   "source": [
    "#### Target: GPT-4-Turbo "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abf3b1d2-81ef-47c7-8aab-c52eb5efdaf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_simulation(target='gpt-4-1106-preview', \n",
    "               evaluator=evaluator,\n",
    "               attacker=attacker, \n",
    "               attack=attack, \n",
    "               prompt_ids=range(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53141b3e-74a5-4ced-86ee-8f067b0502c9",
   "metadata": {},
   "source": [
    "#### Target: PaLM-2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd4cd2c1-ce90-4b08-800f-f29f96eb658a",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_simulation(target='palm-2', \n",
    "               evaluator=evaluator,\n",
    "               attacker=attacker, \n",
    "               attack=attack, \n",
    "               prompt_ids=range(50))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
